load("Incarceration data and controls 1970 - 2020.RData")


############################
#Sentencing variable measures
#############################

#check reliability
library(psych)
library(lavaan)


alpha(incar_data[,c(23:39)]) #Cronbach's alpha for reforms
alpha(incar_data[,c(42:58)]) ##Cronbach's alpha for enhancements

#check eigenvalues
reforms<-incar_data[,c(1,23:39)]
sentences<-incar_data[,c(1,42:58)]
colnames(reforms)[-1]<-colnames(sentences)[-1]<-paste("x",1:17,sep="")

joint<-cbind(sentences[,-1],reforms[ ,c(2:5,7:11,13:14,16:18)])
colnames(joint)[1:17]<-paste("sentence",colnames(joint)[1:17],sep="_")

summary(efa(joint,nfactors=2,rotation="varimax"))



###now do multilevel CFA
#reforms
reforms_mod<-'
        reforms=~x1+x2+x3+x4+x6+x7+x8+x9+x10+x12+x13+x15+x16+x17

'

results<-cfa(reforms_mod,data=reforms,cluster="State")
incar_data$reforms_var<-lavPredict(results)


#enhancements
colnames(sentences)[-1]<-paste("x",1:17,sep="")

sentences_mod<-'
        enhancements=~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16+x17

'
results<-cfa(sentences_mod,data=sentences,cluster="State")
incar_data$sentencing_var<-lavPredict(results)



###################################3
# Preprocess data
###################################

##Impute missing data for controls
library(Amelia)
imp.dat<-amelia(incar_data[,-c(3:4,25:41,44:60)],m=1,ts="year",cs="State")
imp.dat<-imp.dat$imputations$imp1
imp.dat<-cbind(imp.dat,incar_data[,c(25:41,44:60)])
imp.dat$repub_str<-imp.dat$repub.governor+imp.dat$repub_chamber


#create lags for ECM
library(dplyr)
incar_data<-imp.dat
vars_to_lag<-colnames(incar_data)[3:ncol(incar_data)]

for(i in 1:length(vars_to_lag)){
  
  for(k in 1:20){
    new_col_name<-paste("lag",k,"_",vars_to_lag[i],sep="")
    
    incar_data<-incar_data %>%
      group_by(State) %>%
      mutate(!!sym(new_col_name) := lag(!!(sym(vars_to_lag[i])), n=k, order_by=State))
  }
  print(i)
}



#calculate differences
incar_data$diff.incar<-incar_data$incar.rate-incar_data$lag1_incar.rate
incar_data$diff.viol<-incar_data$viol.crime.rate-incar_data$lag1_viol.crime.rate
incar_data$diff.prop<-incar_data$prop.rate-incar_data$lag1_prop.rate


#check stationarity
library(plm)
b<-pdata.frame(incar_data[,c("incar.rate","diff.incar",
                             "State","year")],index=c("State","year"))
purtest(b$incar.rate,data=b)
purtest(b$diff.incar,data=b) #stationary after detrending




#####################################
#     Table 1
#####################################

library(oaxaca)
formula<-"diff.incar~lag1_incar.rate+
  
            lag2_viol.crime.rate+
             lag1_prop.rate+

             lag1_drug.rate+

             lag1_reforms_var+

             lag6_sentencing_var+

             lag1_mood+

             lag1_prct.repub+

             lag1_repub_str+

             lag1_Gini+

             lag1_unemp.rate+

             lag1_fiscal_cap+

             lag1_pct.black | pre_recon"
formula<-as.formula(formula)

#1994
incar_data$pre_recon<-0
incar_data$pre_recon[incar_data$year<=1994]<-1
decomp<-oaxaca(formula,data=incar_data,R=500)

decomp$twofold$overall
decomp$y

vars<-decomp$twofold$variables[[5]]
vars


#2001
incar_data$pre_recon<-0
incar_data$pre_recon[incar_data$year<=2001]<-1
decomp<-oaxaca(formula,data=incar_data,R=500)

decomp$twofold$overall
decomp$y

vars<-decomp$twofold$variables[[5]]
vars

#2008
incar_data$pre_recon<-0
incar_data$pre_recon[incar_data$year<=2008]<-1
decomp<-oaxaca(formula,data=incar_data,R=500)

decomp$twofold$overall
decomp$y

vars<-decomp$twofold$variables[[5]]
vars







###########################################
# Create regional decompositions--Figure 1
###########################################

#94
incar_data$pre_recon<-0
incar_data$pre_recon[incar_data$year<=1994]<-1
incar_data$midwest<-0
incar_data$midwest[incar_data$south==0&
                     incar_data$northeast==0&
                     incar_data$west==0]<-1



regions<-c("midwest","south","northeast","west")
region_overall<-data.frame()
region_vars<-data.frame()

for(i in 1:length(regions)){
  subset<-incar_data[incar_data[,regions[i]]==1,]
  r_decomp<-oaxaca(formula,data=subset,R=500)
  r_vars<-r_decomp$twofold$variables[[5]]
  
  region_overall<-rbind.data.frame(region_overall,
                                   data.frame(y.diff=r_decomp$y$y.diff,
                                              explained=r_decomp$twofold$overall[5,2],
                                              region=regions[i]))
  
  region_vars<-rbind.data.frame(region_vars,
                                data.frame(names=rownames(r_vars),
                                           coef=r_vars[,2],
                                           SE=r_vars[,3],
                                           perc_exp=(r_vars[,2]/region_overall$y.diff[i])*100,
                                           region=regions[i]))
}




gg_data<-region_vars[region_vars$names%in%c("lag2_viol.crime.rate",
                                            "lag1_prop.rate",
                                            "lag1_reforms_var",
                                            "lag6_sentencing_var"),]

gg_data$low_CI<-(gg_data$coef)-(1.96*gg_data$SE)
gg_data$upp_CI<-(gg_data$coef)+(1.96*gg_data$SE)

gg_data$xlab<-rep(c(" Violent\nCrime","Property\nCrime", "Sentencing\nReform","Sentencing\nPolicy"),4)
gg_data$region<-c(rep("Midwest",4),rep("South",4),rep("Northeast",4),rep("West",4))

library(ggplot2)
gg_data$perc_low_CI<-(gg_data$perc_exp)-1.96*(100*(gg_data$SE/rep(region_overall$y.diff,each=4)))
gg_data$perc_upp_CI<-(gg_data$perc_exp)+1.96*(100*(gg_data$SE/rep(region_overall$y.diff,each=4)))



plot_94<-ggplot(gg_data,aes(x=xlab, 
                            y=perc_exp,
                            ymin=perc_low_CI, 
                            ymax=perc_upp_CI,
                            fill=region)) +
  geom_bar( color="black",position=position_dodge(),stat="identity", alpha=0.9) +
  geom_errorbar( width=0.1, colour="black", alpha=0.4,
                 position = position_dodge(.9))+
  #  facet_grid(.~region)+
  ylab("Percent explained")+
  xlab("")+
  scale_fill_manual(values=c("dodgerblue3",
                             "gold3",
                             "grey67",
                             "white"))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.title=element_blank())+
  ggtitle("A. Pre/post 1994")








#2001
incar_data$pre_recon<-0
incar_data$pre_recon[incar_data$year<=2001]<-1


##gg_decomps
regions<-c("midwest","south","northeast","west")
region_overall<-data.frame()
region_vars<-data.frame()

for(i in 1:length(regions)){
  subset<-incar_data[incar_data[,regions[i]]==1,]
  r_decomp<-oaxaca(formula,data=subset,R=500)
  r_vars<-r_decomp$twofold$variables[[5]]
  
  region_overall<-rbind.data.frame(region_overall,
                                   data.frame(y.diff=r_decomp$y$y.diff,
                                              explained=r_decomp$twofold$overall[5,2],
                                              region=regions[i]))
  
  region_vars<-rbind.data.frame(region_vars,
                                data.frame(names=rownames(r_vars),
                                           coef=r_vars[,2],
                                           SE=r_vars[,3],
                                           perc_exp=(r_vars[,2]/region_overall$y.diff[i])*100,
                                           region=regions[i]))
}




gg_data<-region_vars[region_vars$names%in%c("lag2_viol.crime.rate",
                                            "lag1_prop.rate",
                                            "lag1_reforms_var",
                                            "lag6_sentencing_var"),]

gg_data$low_CI<-(gg_data$coef)-(1.96*gg_data$SE)
gg_data$upp_CI<-(gg_data$coef)+(1.96*gg_data$SE)

gg_data$xlab<-rep(c(" Violent\nCrime","Property\nCrime", "Sentencing\nReform","Sentencing\nPolicy"),4)
gg_data$region<-c(rep("Midwest",4),rep("South",4),rep("Northeast",4),rep("West",4))

library(ggplot2)
gg_data$perc_low_CI<-(gg_data$perc_exp)-1.96*(100*(gg_data$SE/rep(region_overall$y.diff,each=4)))
gg_data$perc_upp_CI<-(gg_data$perc_exp)+1.96*(100*(gg_data$SE/rep(region_overall$y.diff,each=4)))



plot_01<-ggplot(gg_data,aes(x=xlab, 
                            y=perc_exp,
                            ymin=perc_low_CI, 
                            ymax=perc_upp_CI,
                            fill=region)) +
  geom_bar( color="black",position=position_dodge(),stat="identity", alpha=0.9) +
  geom_errorbar( width=0.1, colour="black", alpha=0.4,
                 position = position_dodge(.9))+
  #  facet_grid(.~region)+
  ylab("Percent explained")+
  xlab("")+
  scale_fill_manual(values=c("dodgerblue3",
                             "gold3",
                             "grey67",
                             "white"))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.title=element_blank())+
  ggtitle("B. Pre/post 2001")










#2008
incar_data$pre_recon<-0
incar_data$pre_recon[incar_data$year<=2008]<-1


##gg_decomps
regions<-c("midwest","south","northeast","west")
region_overall<-data.frame()
region_vars<-data.frame()

for(i in 1:length(regions)){
  subset<-incar_data[incar_data[,regions[i]]==1,]
  r_decomp<-oaxaca(formula,data=subset,R=500)
  r_vars<-r_decomp$twofold$variables[[5]]
  
  region_overall<-rbind.data.frame(region_overall,
                                   data.frame(y.diff=r_decomp$y$y.diff,
                                              explained=r_decomp$twofold$overall[5,2],
                                              region=regions[i]))
  
  region_vars<-rbind.data.frame(region_vars,
                                data.frame(names=rownames(r_vars),
                                           coef=r_vars[,2],
                                           SE=r_vars[,3],
                                           perc_exp=(r_vars[,2]/region_overall$y.diff[i])*100,
                                           region=regions[i]))
}




gg_data<-region_vars[region_vars$names%in%c("lag2_viol.crime.rate",
                                            "lag1_prop.rate",
                                            "lag1_reforms_var",
                                            "lag6_sentencing_var"),]

gg_data$low_CI<-(gg_data$coef)-(1.96*gg_data$SE)
gg_data$upp_CI<-(gg_data$coef)+(1.96*gg_data$SE)

gg_data$xlab<-rep(c(" Violent\nCrime","Property\nCrime", "Sentencing\nReform","Sentencing\nPolicy"),4)
gg_data$region<-c(rep("Midwest",4),rep("South",4),rep("Northeast",4),rep("West",4))

library(ggplot2)
gg_data$perc_low_CI<-(gg_data$perc_exp)-1.96*(100*(gg_data$SE/rep(region_overall$y.diff,each=4)))
gg_data$perc_upp_CI<-(gg_data$perc_exp)+1.96*(100*(gg_data$SE/rep(region_overall$y.diff,each=4)))



plot_08<-ggplot(gg_data,aes(x=xlab, 
                            y=perc_exp,
                            ymin=perc_low_CI, 
                            ymax=perc_upp_CI,
                            fill=region)) +
  geom_bar( color="black",position=position_dodge(),stat="identity", alpha=0.9) +
  geom_errorbar( width=0.1, colour="black", alpha=0.4,
                 position = position_dodge(.9))+
  #  facet_grid(.~region)+
  ylab("Percent explained")+
  xlab("")+
  scale_fill_manual(values=c("dodgerblue3",
                             "gold3",
                             "grey67",
                             "white"))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.title=element_blank())+
  ggtitle("C. Pre/post 2008")



#Figure 1
library(ggpubr)

ggarrange(plot_94,plot_01,plot_08,
          nrow=3,
          common.legend = TRUE,
          legend = "right")
